################################################################################
# Title   : Replication Code — Union Spillovers Paper
# Project : The Impact of Unions on Non-union Wage Setting: Threats and Bargaining
# Authors : David A. Green, Ben M. Sand, Iain G. Snoddy, Jeanne Tschopp
# Output  : Figures 5
# Date    : August 2025
################################################################################

# Set default graphical theme
theme_set(theme_light())

# Create output directory if it doesn't exist
if (!dir.exists(figs)) dir.create(figs, recursive = TRUE)

dat <- read_dta(paste0(wd,"/ineq_all_grp_v1.dta")) %>% 
  mutate(year = case_when(
    year2 ==  1 ~ "1980",
    year2 ==  2 ~ "1990",
    year2 ==  3 ~ "2000",
    year2 ==  4 ~ "2010")) %>% 
  filter(year2 < 5) %>% 
  mutate(group = case_when(
    grp %in% c("men10", "women10") ~ "Gender gap",
    grp %in% c('low10','high10') ~ "Post-secondary premium",
    grp %in% c('LowMen10','HighMen10') ~ "Men: Post-secondary premium",
    grp %in% c('LowWomen10','HighWomen10') ~ "Women: Post-secondary premium",
  )) %>% 
  
  # create an ordering variable
  
  mutate(order = case_when(
    str_detect(grp, "igh") ~ 2,
    str_detect(grp, "ow") ~ 1,
    str_detect(grp, "women") ~ 2,
    .default = 1
  )) %>% 
  group_by(group, year) %>% 
  arrange(group, year, order)   %>%  
  mutate(across(c('Wage',"CF"), ~ .x - lag(.x)),
         year = as.Date(ISOdate(year, 1, 1))) %>% 
  
  filter(!is.na(Wage)) %>% 
  
  ungroup() %>% 
  
  mutate(d = Wage - CF)%>%
  
  select(Wage, CF, year, group, d) %>% 
  
  # pivot longer for wages and cf 
  pivot_longer(cols = c(Wage, CF), names_to = "var", values_to = "value") 


df <- dat

df <- df %>% 
  mutate(group = fct_relevel(group, "Gender gap", "Post-secondary premium"))

# Ensure 'year' is in Date format
df$year <- as.Date(df$year)

# Filter data for the last year (2010-01-01) and compute necessary statistics
diff_labels <- df %>%
  filter(year == as.Date("2010-01-01"), var %in% c("Wage", "CF")) %>%
  group_by(group) %>%
  summarize(
    d = unique(d[!is.na(d)]),      # difference value provided
    y_min = min(value) + 0.005,            # lower value (for CF vs Wage)
    y_max = max(value) - 0.005,            # upper value
    midpoint = mean(value)  - 0.001       # midpoint for placing the label
  )

ggplot(df, aes(x = year, y = value, colour = var, linetype = var, shape = var)) +
  geom_line() +
  geom_point() +
  facet_wrap(~ group, scales = "free_y", shrink = TRUE) +
  
  # Add text label for the difference 'd'
  geom_text(
    data = diff_labels,
    aes(
      x = as.Date("2010-01-01"), 
      y = midpoint, 
      label = paste("", round(d, 3))
    ),
    inherit.aes = FALSE,
    vjust = 0,
    hjust = 1.75,
    size = 2.5
  ) +
  
  # Add a vertical segment with arrows at both ends to represent the difference
  geom_segment(
    data = diff_labels %>% filter(group != "Women: Post-secondary premium"),
    aes(
      x = as.Date("2009-01-01"), xend = as.Date("2009-01-01"),
      y = y_min, yend = y_max
    ),
    inherit.aes = FALSE,
    arrow = arrow(length = unit(0.2, "cm"), ends = "both"),
    colour = "grey50"
  ) +
  # Scale for color: remove title, rename levels
  scale_colour_discrete(
    name = NULL,
    breaks = c("Wage", "CF"),
    labels = c("Wage" = "Observed", "CF" = "Counterfactual")
  ) +
  theme_minimal() +
  theme(
    #  legend.position = "bottom",          # Place legend at the bottom
    #  legend.direction = "horizontal",      # Arrange legend items horizontally
    legend.background = element_rect(fill = "white", color = "black"),
    legend.position = c(1, 0),  # Bottom-left corner
    legend.justification = c(1, 0),  # Align bottom-left
    #   legend.text = element_text(size = 10),  # Adjust legend text size
    legend.key.width = unit(2, "lines"),
    legend.key.height = unit(.75, "lines"),
  ) +
  scale_linetype_discrete(
    name = NULL,
    breaks = c("Wage", "CF"),
    labels = c("Wage" = "Observed", "CF" = "Counterfactual")
  ) +
  scale_shape_discrete(
    name = NULL,
    breaks = c("Wage", "CF"),
    labels = c("Wage" = "Observed", "CF" = "Counterfactual")
  ) +
  guides(
    colour = guide_legend(nrow = 2) # One row for the legend
  ) +
  labs(
    x = "Year",
    y = "Log wage differential",
    colour = "Variable"
  )

ggsave(paste0(figs,"/Figure_5.pdf"),
       units = c("in"),
       height = 4.5,
       width = 8.5)